/*
This makes the exact same dataset as the do-file "05_creating_project_specific_data"
except that observations with work distances below 5 kilometers are kept in the sample.
*/

/* === A measure of distance to Bergen municipality ========================= */
use grk1 grk2 s using "${additionaldata}distances", clear
gen munic = floor(grk2/10000)
keep if munic == 1201
rename s dist
replace dist = dist / 1000 /* Measure in km */
bys grk1 (dist): keep if _n == 1 /* Keep the grk1 closest to bergen municipality */
tempfile bergen
save `bergen', replace

use grk1 grk2 s using "${additionaldata}distances", clear
rename grk1 temp
rename grk2 grk1
rename temp grk2
gen munic = floor(grk2/10000)
keep if munic == 1201
rename s dist
replace dist = dist / 1000 /* Measure in km */
bys grk1 (dist): keep if _n == 1 /* only keeping the grk2 that is closest to bergen kommune */
append using `bergen'
bys grk1 (dist): keep if _n == 1 /* only keeping the obs that is closest to bergen */
drop grk2 munic /* drop bergen grk */
drop if dist == . 
rename dist distToCCBergen
rename grk1 grk

tempfile bergen
save `bergen', replace

/* === Distance to stavanger municipality =================================== */
use grk1 grk2 s using "${additionaldata}distances", clear
gen munic = floor(grk2/10000)
keep if munic == 1103 /* stavanger */
rename s dist
replace dist = dist / 1000 /* Measure in km */
bys grk1 (dist): keep if _n == 1 /* only keeping the grk1 that is closest to Stavanger */

tempfile stavanger
save `stavanger'

use grk1 grk2 s using "${additionaldata}distances", clear
rename grk1 temp
rename grk2 grk1
rename temp grk2
gen munic = floor(grk2/10000)
keep if munic == 1103
rename s dist
replace dist = dist / 1000 /* Measure in km */
bys grk1 (dist): keep if _n == 1 
append using `stavanger'
bys grk1 (dist): keep if _n == 1 
drop grk2 munic 
drop if dist == . 
rename dist distToCCStavanger
rename grk1 grk
tempfile stavanger
save `stavanger'

/* === Distance to haugesund municipality =================================== */
use grk1 grk2 s using "${additionaldata}distances", clear
gen munic = floor(grk2/10000)
keep if munic == 1106 /* haugesund */
rename s dist
replace dist = dist / 1000 /* Measure in km */
bys grk1 (dist): keep if _n == 1 

tempfile haugesund
save `haugesund'

use grk1 grk2 s using "${additionaldata}distances", clear
rename grk1 temp
rename grk2 grk1
rename temp grk2
gen munic = floor(grk2/10000)
keep if munic == 1106
rename s dist
replace dist = dist / 1000 
bys grk1 (dist): keep if _n == 1 
append using `haugesund'
bys grk1 (dist): keep if _n == 1 
drop grk2 munic
drop if dist == . 
rename dist distToCCHaugesund
rename grk1 grk
tempfile haugesund
save `haugesund'

/* === Distance to Kristiansand municipality ================================ */
use grk1 grk2 s using "${additionaldata}distances", clear
gen munic = floor(grk2/10000)
keep if munic == 1001
rename s dist
replace dist = dist / 1000 
bys grk1 (dist): keep if _n == 1 

tempfile kristiansand
save `kristiansand'

use grk1 grk2 s using "${additionaldata}distances", clear
rename grk1 temp
rename grk2 grk1
rename temp grk2
gen munic = floor(grk2/10000)
keep if munic == 1001
rename s dist
replace dist = dist / 1000 
bys grk1 (dist): keep if _n == 1 
append using `kristiansand'
bys grk1 (dist): keep if _n == 1 
drop grk2 munic
drop if dist == . 
rename dist distToCCKristiansand
rename grk1 grk
tempfile kristiansand
save `kristiansand'

/* === Open actual dataset ================================================== */

/* Starting dataset */
use "${newdata}hh_3_cars_endofyear.dta", clear

/* Since we use work distance for treatment status, neighborhood must be observed*/
keep if grkrets != "" /* drops 195 */

/* Since we use work distance for treatment status, workplace must exist */
keep if employed1 == 1 | employed2 == 1
/* Work distance must exist */
keep if dist1 != . | dist2 != .

/* Numerical variable for neighborhood of resicence */
destring grkrets, gen(grk) 

/* Merging distances to cities to each household */
merge m:1 grk using `bergen', keep(match master) nogen
merge m:1 grk using `stavanger', keep(match master) nogen
merge m:1 grk using `haugesund', keep(match master) nogen
merge m:1 grk using `kristiansand', keep(match master) nogen

/* Did the household live within 50 kms of each city? */
gen aux = (distToCCStavanger <= 50 /// 
	      | distToCCBergen    <= 50 ///
		  | distToCCHaugesund    <= 50 /// 
		  | distToCCKristiansand    <= 50)

/* We only want to condition on residence in 2014 */		  
replace aux = . if year != 2014

/* And extrapolate to remaining years */
bys familienr: egen aux2 = mean(aux)

/* Dropping households where distance to CC was above 50 in 2014 */
*drop if aux2 == 0
keep if aux2 == 1

destring grk_bed1, gen(grk_work1)
destring grk_bed2, gen(grk_work2)

*gen fylke = floor(grk/1000000)
*gen kommune = floor(grk/10000)

/* Setting map data to missing if person has no work grk (is not employed) */
foreach var in time dist toll ptl {
	replace `var'1 = . if grk_work1 == .
	replace `var'2 = . if grk_work2 == .
}

/* Living and working in same grunnkrets */
foreach var in time dist toll ptl {
	replace `var'1 = 0 if `var'1== . & grk_work1 != . & grk_work1 == grk
	replace `var'2 = 0 if `var'2== . & grk_work2 != . & grk_work2 == grk
}

/* === Creating useful variables ============================================ */
* Average toll payments of household members | toll is non-missing
gen toll = ( toll1 + toll2 ) / 2
replace toll = toll1 if toll == .
replace toll = toll2 if toll == .
* Average work distance of household members | wd is non-missing
gen dist = ( dist1 + dist2 ) / 2
replace dist = dist1 if dist == .
replace dist = dist2 if dist == .
* Average commute time for household members | time is non-missing
gen time_work = ( time1 + time2 ) / 2
replace time_work = time1 if time_work == .
replace time_work = time2 if time_work == .
* Defining couple as households with two adult members
capt drop couple
gen couple = (age1 != . & age2 != .)

* Averages of the following variables
foreach var in age wies wealth employed retired kvinne {
	gen `var' = (`var'1 + `var'2) / 2
	replace `var' = `var'1 if `var' == . /* if only person 1 */
	replace `var' = `var'2 if `var' == . /* if only person 2 */
	order `var', b(`var'1)
}	

* For education we use the max rather than the average
gen max_educ = grputd1
replace max_educ = grputd2 if grputd2 > grputd1 & grputd2 != .	
replace max_educ = 0 if max_educ == . /* zero here refers to unknown */
replace max_educ = 0 if max_educ == 5 /* category 5 is also unknown */

* Work distance must be between upper and lower limits
keep if dist <= 50
*keep if dist >= 5
compress

* Outcome variables
gen bev = (fuel1 == 5 | fuel2 == 5 | fuel3 == 5)

gen nrbev = 0
forvalues i = 1/3 {
	replace nrbev = nrbev + 1 if fuel`i' == 5
}
gen cars = 0
forvalues i = 1(1)3 {
	replace cars = cars + 1 if carid`i' != ""
}

gen ice = 0
forvalues i = 1(1)3 {
	replace ice = ice + 1 if carid`i' != "" & fuel`i' != 5
}

gen mover = 0
bys familienr (year): replace mover = 1 if grk[_n] != grk[_n-1] & familienr[_n] == familienr[_n-1]

* Dropping car data we don't need
drop carid* car_acq* kmperday* daysused* daysowned* freg_DMY* fuel*
capt drop aux 
capt drop aux2 
compress
/* === Definition of treatment status ======================================= */

gen bergen = 		(distToCCBergen    		<= 50)
gen stavanger = 	(distToCCStavanger    	<= 50)
gen haugesund = 	(distToCCHaugesund    	<= 50)
gen kristiansand = 	(distToCCKristiansand   <= 50)

/* Definition: 
	- Must be 2014
	- Must live within 50 kms of either city, and 
	- Paying commuter: Toll payments must equate rates in the toll cordon
	  (or double the rates in the toll cordon in case they drive past two toll 
	  gates - due to the hour rule, actual toll payment is the same).
	- Non-paying commuter: Toll payments must equate zero  
*/		  
gen group = .
* Bergen, paying commuter: Toll rates are 25
replace group = 1 if group == . & bergen == 1 /// 
	& (toll1 == 25 | toll2 == 25 | toll1 == 50 | toll2 == 50) & year == 2014
* Bergen, non-paying commuter: Toll rates are 0
replace group = 2 if group == . & bergen == 1 /// 
	& toll == 0 & year == 2014		  
* Stavanger, paying commuter: Toll rates are 20
* Haugesund, paying commuter: Toll rates are 14
* Kristiansand, paying commuter: Toll rates are 21	
replace group = 3 if group == . & ( ///
	(stavanger == 1 & (toll1 == 20 | toll1 == 40 | toll2 == 20 | toll2 == 40)) ///
	| (haugesund == 1 & (toll1 == 14 | toll1 == 28 | toll2 == 14 | toll2 == 28)) ///
	| (kristiansand == 1 & (toll1 == 21 | toll1 == 42 | toll2 == 21 | toll2 == 42))) ///
	& year == 2014
* Other cities, non-paying commuter: Toll rates are zero	
replace group = 4 if group == . & (stavanger == 1 | haugesund == 1 | kristiansand == 1) /// 
	& toll == 0 & year == 2014			  
* Extrapolating treatment status to all years	
bys familienr: egen aux = mean(group)
replace group = aux if group == .	
drop aux	
label define trmlabel ///
	1 "Bergen, commuter" ///
	2 "Bergen, non-commuter" ///
	3 "Other city, commuter" ///
	4 "Other city, non-commuter"
label values group trmlabel

compress
save ${newdata}carownership_dataset_bergen_shortwd.dta, replace
